#### Visualize distribution of continuous DV's ####
ced_dv <- ggplot(ced,
                 aes(x = percent),
                 fill = "black") +
  geom_histogram() +
  xlab("% vote share") +
  ylab("Frequency") +
  ggtitle("Local California Elections") +
  theme_bw() +
  theme(text = element_text(family = "Times New Roman"),
        panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
        title = element_text(color = "black"),
        plot.caption = element_text(hjust = 0))

senate_dv_primary <- ggplot(primary,
                            aes(x = ppct),
                            fill = "black") +
  geom_histogram() +
  xlab("") +
  ylab("") +
  ggtitle("Senate primary") +
  theme_bw() +
  theme(text = element_text(family = "Times New Roman"),
        panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
        title = element_text(color = "black"),
        plot.caption = element_text(hjust = 0))

senate_dv_general <- ggplot(general,
                            aes(x = gpct),
                            fill = "black") +
  geom_histogram() +
  xlab("") +
  ylab("") +
  ggtitle("Senate general") +
  theme_bw() +
  theme(text = element_text(family = "Times New Roman"),
        panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
        title = element_text(color = "black"),
        plot.caption = element_text(hjust = 0))

t <- ggarrange(ced_dv,senate_dv_primary,senate_dv_general)

ggsave("Figures/DistributionOfVoteShares.png",
       width = 6,
       height = 4,
       units = "in",
       device = "png",
       dpi = 300)


#### Summary stats for CED model variables ####
t <- ced %>% 
  mutate(`City Council Candidate` = case_when(office == "CITY COUNCIL" ~ 1,
                                              office != "CITY COUNCIL" ~ 0),
         `School Board Candidate` = case_when(office == "SCHOOL BOARD MEMBER" ~ 1,
                                              office != "SCHOOL BOARD MEMBER" ~ 0)) %>% 
  select(percent,
         last_algorithm,
         first_algorithm,
         percent_freq,
         fprop,
         white,
         incumbent,
         female,
         lchars,
         fchars,
         totvotes1000,
         `City Council Candidate`,
         `School Board Candidate`,
         seats_comps) %>% 
  dplyr::rename(`Vote Share` = percent,
                `Surname Pronounceability` = last_algorithm,
                `First Name Pronounceability` = first_algorithm,
                `Last Name Common.` = percent_freq,
                `First Name Common.` = fprop,
                Incumbent = incumbent,
                Female = female,
                White = white,
                `Surname Length` = lchars,
                `First Name Length` = fchars,
                `Total votes in election/1000` = totvotes1000,
                `Open seats/competitors` = seats_comps)

stargazer(as.data.frame(t), type = "latex",
          style = "ajps",
          title = "Descriptive Statistics for California Local Elections",
          label = "tab:ced-sumstats",
          out = "tables/ced-sumstats.tex")

#### Summary stats for primary congressional model variables ####
t <- primary %>% 
  mutate(`Senate Candidate` = case_when(race == "Senate" ~ 1,
                                        race != "Senate" ~ 0),
         `House Candidate` = case_when(race == "House" ~ 1,
                                       race != "House" ~ 0)) %>% 
  mutate(Republican = case_when(party == "R" ~ 1,
                                party != "R" ~ 0),
         Democrat = case_when(party == "D" ~ 1,
                              party != "D" ~ 0)) %>% 
  select(ppct,
         last_algorithm,
         first_algorithm,
         percent_freq,
         fprop,
         white,
         incumbent,
         female,
         lchars,
         fchars,
         `Senate Candidate`,
         `House Candidate`,
         num_prim_opps,
         Republican,
         Democrat) %>% 
  dplyr::rename(`Vote Share` = ppct,
                `Surname Pronounceability` = last_algorithm,
                `First Name Pronounceability` = first_algorithm,
                `Last Name Common.` = percent_freq,
                `First Name Common.` = fprop,
                Incumbent = incumbent,
                Female = female,
                White = white,
                `Surname Length` = lchars,
                `First Name Length` = fchars,
                `Number of Primary Competitors` = num_prim_opps)

stargazer(as.data.frame(t), type = "latex",
          style = "ajps",
          title = "Descriptive Statistics for Primary Senate Elections",
          label = "tab:senate-primary-sumstats",
          out = "Tables/senate-primary-sumstats.tex")

#### Summary stats for general congressional model variables ####
t <- general %>% 
  mutate(`Senate Candidate` = case_when(race == "Senate" ~ 1,
                                        race != "Senate" ~ 0),
         `House Candidate` = case_when(race == "House" ~ 1,
                                       race != "House" ~ 0)) %>% 
  mutate(Republican = case_when(party == "R" ~ 1,
                                party != "R" ~ 0),
         Democrat = case_when(party == "D" ~ 1,
                              party != "D" ~ 0)) %>% 
  select(gpct,
         last_algorithm,
         first_algorithm,
         percent_freq,
         fprop,
         white,
         incumbent,
         female,
         lchars,
         fchars,
         `Senate Candidate`,
         `House Candidate`,
         Republican,
         Democrat) %>% 
  dplyr::rename(`Vote Share` = gpct,
                `Surname Pronounceability` = last_algorithm,
                `First Name Pronounceability` = first_algorithm,
                `Last Name Common.` = percent_freq,
                `First Name Common.` = fprop,
                Incumbent = incumbent,
                Female = female,
                White = white,
                `Surname Length` = lchars,
                `First Name Length` = fchars,)

stargazer(as.data.frame(t), type = "latex",
          style = "ajps",
          title = "Descriptive Statistics for General Senate Elections",
          label = "tab:senate-general-sumstats",
          out = "Tables/senate-general-sumstats.tex")


#### Correlations between fluency variables ####
cor_table <- function(data,
                      title,
                      filename) {
  out <- data %>% 
    select(`First name pronounceability` = first_algorithm,
           `Surname pronounceability` = last_algorithm,
           `First name commonality` = fprop,
           `Surname commonality` = percent_freq)
  
  out <- out %>% 
    correlate(use = "complete.obs") %>% 
    shave() %>% 
    mutate_if(is.character, ~replace_na(.,"")) %>% 
    rename(` ` = term)
  
  options(knitr.kable.NA = 'X')
  
  out %>% 
    kable(format = "latex",
          booktabs = T,
          escape = F,
          label = paste0("cor-table-", filename),
          caption = paste0("Correlations Between Name Fluency Measures in", title),
          digits = 3) %>% 
    kable_styling(latex_options="scale_down") %>% 
    save_kable(file = paste0("tables/corr-table-", filename, ".tex"), self_contained = T)
}

cor_table(general,
          "General Congressional Elections",
          "general")

cor_table(primary,
          "Primary Congressional Elections",
          "primary")

cor_table(ced,
          "Local California Elections",
          "ced")


primary_corrs <- primary %>% 
  select(first_algorithm,
         last_algorithm,
         fprop,
         percent_freq)
t <- cor(primary_corrs, use = "complete.obs")
  
general_corrs <- general %>% 
  select(first_algorithm,
         last_algorithm,
         fprop,
         percent_freq)
t <- cor(general_corrs, use = "complete.obs")

corrplot::corrplot(t, type = "upper")
